R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

library(Seurat)
## Attaching SeuratObject
bt4_doublet <- readRDS(file='processed/bt4_doublet')
bt5_doublet <- readRDS(file='processed/bt5_doublet')
bt6_doublet <- readRDS(file='processed/bt6_doublet')

MT genes

define chloroplast genes

bt4_doublet[["percent.ch"]] <- PercentageFeatureSet(bt4_doublet, pattern = "^ATC")
bt5_doublet[["percent.ch"]] <- PercentageFeatureSet(bt5_doublet, pattern = "^ATC")
bt6_doublet[["percent.ch"]] <- PercentageFeatureSet(bt6_doublet, pattern = "^ATC")

visualziation the proportion of mt and ch genes

VlnPlot(bt4_doublet, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.ch"), ncol = 4)

VlnPlot(bt5_doublet, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.ch"), ncol = 4)

VlnPlot(bt6_doublet, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.ch"), ncol = 4)

remove the the MT and ch GENES

bt4_QC <- subset(bt4_doublet, subset = nFeature_RNA > 200 & nFeature_RNA < 40000 & percent.mt < 10 & percent.ch<10)
bt5_QC <- subset(bt5_doublet, subset = nFeature_RNA > 200 & nFeature_RNA < 40000 & percent.mt < 10 & percent.ch<10)
bt6_QC <- subset(bt6_doublet, subset = nFeature_RNA > 200 & nFeature_RNA < 40000 & percent.mt < 10 & percent.ch<10)

VlnPlot(bt4_QC, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.ch"), ncol = 4)

VlnPlot(bt5_QC, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.ch"), ncol = 4)

VlnPlot(bt6_QC, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.ch"), ncol = 4)

remove the doublet, using subset function

bt4_QC<-subset(bt4_QC, subset=DF.classifications_0.25_0.09_366=="Singlet")
bt5_QC<-subset(bt5_QC, subset=DF.classifications_0.25_0.09_682=="Singlet")
bt6_QC<-subset(bt6_QC, subset=DF.classifications_0.25_0.09_203=="Singlet")

investigate if any of them enriched by protoplast genes

# loading protoplast induced genes

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(readr)
feature_list<-read_tsv("lib/features.tsv")
## Registered S3 method overwritten by 'cli':
##   method     from         
##   print.boxx spatstat.geom
## Rows: 32833 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): locus, gene_name, Gene Expression
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
protoplastx<-read.csv("raw_data/QC_genes/protoplast.csv")
proto<-protoplastx%>%
  filter(log2FoldChange>2, padj<0.05)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.6     ✓ stringr 1.4.0
## ✓ tidyr   1.1.4     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
proto_id<-inner_join(proto, feature_list, by=c("gene"="locus"))

#RidgePlot(bt4_QC, features = proto_id$gene_name, ncol = 1)
#RidgePlot(bt5_QC, features = proto_id$gene_name, ncol = 1)
#RidgePlot(bt6_QC, features = proto_id$gene_name, ncol = 1)

# I need to output all the features from the objectives and merge with prtoplast genes
protoplast<-proto_id%>%
  filter(gene_name%in%rownames(bt4_QC))%>%
  filter(gene_name%in%rownames(bt5_QC))%>%
  filter(gene_name%in%rownames(bt6_QC))
bt4_QC[["percent.proto"]] <- PercentageFeatureSet(bt4_QC, features = protoplast$gene_name)
bt5_QC[["percent.proto"]] <- PercentageFeatureSet(bt5_QC, features = protoplast$gene_name)
bt6_QC[["percent.proto"]] <- PercentageFeatureSet(bt6_QC, features = protoplast$gene_name)

VlnPlot(bt4_QC, features = c("nFeature_RNA", "percent.proto"), ncol = 2)+ylim(c(0,100))
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.

VlnPlot(bt5_QC, features = c("nFeature_RNA", "percent.proto"), ncol = 2)+ylim(c(0,100))
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.

VlnPlot(bt6_QC, features = c("nFeature_RNA",  "percent.proto"), ncol = 2)+ylim(c(0,100))
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.

remove columns derived from doublet detection

# for bt4
bt4_QC$pANN_0.25_0.09_366 <- NULL
bt4_QC$DF.classifications_0.25_0.09_366 <- NULL
bt4_QC$percent.mt <- NULL
bt4_QC$percent.ch <- NULL

# for bt5
bt5_QC$pANN_0.25_0.09_682 <- NULL
bt5_QC$DF.classifications_0.25_0.09_682 <- NULL
bt5_QC$percent.mt <- NULL
bt5_QC$percent.ch <- NULL

# for bt6
bt6_QC$pANN_0.25_0.09_203 <- NULL
bt6_QC$DF.classifications_0.25_0.09_203 <- NULL
bt6_QC$percent.mt <- NULL
bt6_QC$percent.ch <- NULL

save the objects

saveRDS(bt4_QC, file='processed/bt4_QC')
saveRDS(bt5_QC, file='processed/bt5_QC')
saveRDS(bt6_QC, file='processed/bt6_QC')